Online-Academy
Look, Read, Understand, Apply

Data Mining And Data Warehousing

Bayesian Classifier - Concept

if __name__ == "__main__":
    # Each transaction as a set
    transactions = [
        {'milk', 'bread', 'butter','beer'},
        {'beer', 'bread'},
        {'milk', 'bread', 'beer', 'butter'},
        {'bread', 'butter'}
    ]
    # Example: check if 'milk' is in a transaction
    print('milk' in transactions[0])  # True

    # Example: subset check (important in Apriori)
    itemset = {'beer', 'bread'}
    print(itemset.issubset(transactions[2]))  # True
    count = 0
    for i in transactions:
        if itemset.issubset(i):
            count += 1
    print(itemset, ": ",count)

Close points to given points


import math
#from given point find nearest points.
if __name__ == "__main__":
    given_point = (2, 3)

    points = [
        (1, 2),
        (3, 4),
        (5, 6),
        (2.1, 3.2),
        (10, 10)
    ]

    radius = 1.5  # nearer if distance is <= radius.

    close_points = []  #list of point closer to given_point (2,3)
    close_points.append(given_point)
    distances = []
    for point in points:  #Euclidean Distance formula
        distance = math.sqrt((point[0] - given_point[0])**2 +
                             (point[1] - given_point[1])**2)
        distances.append(distance)
        if distance <= radius:
            close_points.append(point)

    print("Points closer to given point:", close_points)
    print("Distances: ", distances)
    x = []
    y = []
    for point in points:
        x.append(point[0])
        y.append(point[1])
    print(f"x:{sum(x)/len(x)}, y: {sum(y)/len(y)}")

Data Preprocessing

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
if __name__ == "__main__":
    data = {
        'Age': [15, 16, None, 15,34,23,24,25,28,12,14,15],
        'Gender': ['Male', 'Female', 'Female', 'Male','Female', 'Male','Female', 'Male','Female', 'Male','Female','Male'],
        'Marks': [85, 90, 88, 85,45,56,67,78,98,12,23,45]
    }

    df = pd.DataFrame(data)   #creating data frame
    print(df)                   #printing data frame
    mean_age = df['Age'].mean()  # calculating mean of age
    df.fillna({"Age":mean_age}, inplace=True)  # assigning mean_age to empty age cell
    print(df)
    df = df.drop_duplicates() #
    #tranforming male to 1 and female to 0
    df['Gender'] = df['Gender'].map({'Male': 1, 'Female': 0})
    print(df)
    scaler = MinMaxScaler()
    #transforming age in range 0 to 1 using MinMax Normalization
    df[['Age', 'Marks']] = scaler.fit_transform(df[['Age', 'Marks']])
    print(df)